import yfinance as yf
import pandas as pd
import numpy as np
from IPython.display import display
from datetime import timedelta 
import matplotlib.pyplot as plt

def create_technical_features(tickers=pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0],
                              start_date="2015-06-01",
                              end_date="2025-06-01",
                              ma_windows=[5, 20, 50],
                              ema_windows=[20, 50],
                              rsi_window=14,
                              beta_window = 50):
    """
    Downloads stock data and engineers a variety of technical features.

    Args:
        tickers: A list of stock tickers to download from Yahoo Finance.
        start_date: The start date for the data in 'YYYY-MM-DD'.
        end_date: The end date for the data in 'YYYY-MM-DD'.
        ma_windows: A list of window lengths for simple moving averages.
        ema_windows: A list of window lengths for calculating the exponential moving average.
        rsi_window: The window for the computing the Relative Strength Index (RSI).
        beta_window: The window for calculating the stock beta

    Returns:
        A DataFrame of stock features, indexed by Date and Ticker for easy of use.
    """

    longest_window = max(ma_windows + ema_windows + [rsi_window, 26, 12]) # MACD uses 26 and 12
    warmup_days = longest_window + 60 # Add a buffer 
    fetch_start_date = (pd.to_datetime(start_date) - timedelta(days=warmup_days)).strftime('%Y-%m-%d')
    
    print(f"Longest window is {longest_window} days. Fetching data from {fetch_start_date}")
    spy = pd.DataFrame({'Symbol':['SPY']})
    tickers = pd.concat([tickers,spy])
    stock_data = yf.download(tickers['Symbol'].tolist(), start=fetch_start_date, end=end_date, progress=False)

    data = stock_data.stack(level=1).rename_axis(['Date', 'Ticker']) # Rename axis for clarity
    features = pd.DataFrame(index=data.index)
    
    print("Creating price and volume features")
    features['intraday_return'] = data['Close'] / data['Open'] - 1
    features['overnight_gap'] = data['Open'] / data.groupby(level='Ticker')['Close'].shift(1) - 1
    # Use log1p if volume is 0
    features['log_volume'] = data.groupby(level='Ticker')['Volume'].transform(lambda x: np.log1p(x))

    print("Creating moving average features")
    for window in ma_windows:
        features[f'vol_ma_{window}'] = data.groupby(level='Ticker')['Volume'].transform(lambda x: x.rolling(window).mean())
        features[f'close_ma_{window}'] = data.groupby(level='Ticker')['Close'].transform(lambda x: x.rolling(window).mean())
        features[f'price_vs_ma_{window}'] = data['Close'] / features[f'close_ma_{window}'] - 1

    for window in ema_windows:
        features[f'close_ema_{window}'] = data.groupby(level='Ticker')['Close'].transform(lambda x: x.ewm(span=window, adjust=False).mean())
        features[f'price_vs_ema_{window}'] = data['Close'] / features[f'close_ema_{window}'] - 1

    print("Creating volatility and oscillator features")
    high_low = data['High'] - data['Low']
    high_prev_close = np.abs(data['High'] - data.groupby(level='Ticker')['Close'].shift(1))
    low_prev_close = np.abs(data['Low'] - data.groupby(level='Ticker')['Close'].shift(1))

    #Calculate the ATR using an exponential moving average to improve recent information's impact on the metric
    true_range = pd.concat([high_low, high_prev_close, low_prev_close], axis=1).max(axis=1)
    features['atr'] = true_range.groupby(level='Ticker').transform(lambda x: x.ewm(span=window, adjust=False).mean())

    print("Calculating rolling beta against SPY")
    returns = data['Close'].unstack().pct_change()
    market_returns = returns['SPY']
    #calculate beta for a single stock's returns
    def calculate_rolling_beta(stock_returns, market_returns, window):
        #calculate rolling covariance between a stock and the market
        rolling_cov = stock_returns.rolling(window=window).cov(market_returns)
        #calculate rolling variance
        rolling_var = market_returns.rolling(window=window).var()

        return rolling_cov / rolling_var

    #Apply the function to each stock, but dont forget to exclude SPY
    beta_series = returns.drop('SPY', axis=1).apply(lambda x: calculate_rolling_beta(x, market_returns, beta_window))
    
    #Stack to match the features DataFrame's index
    features['beta'] = beta_series.stack()
    
    #Relative Strength Index, RSI
    def rsi(close, window):
        delta = close.diff()
        gain = (delta.where(delta > 0, 0)).ewm(alpha=1/window, adjust=False).mean() # Smoothed
        loss = (-delta.where(delta < 0, 0)).ewm(alpha=1/window, adjust=False).mean()
        rs = gain / loss
        return 100 - (100 / (1 + rs))

    features['rsi'] = data.groupby(level='Ticker')['Close'].transform(lambda x: rsi(x, rsi_window))


    #Moving Average Convergence Divergence (MACD)
    def macd(close):
        ema_12 = close.ewm(span=12, adjust=False).mean()
        ema_26 = close.ewm(span=26, adjust=False).mean()
        return ema_12 - ema_26
    
    features['macd'] = data.groupby(level='Ticker')['Close'].transform(macd)
    features['macd_signal'] = features.groupby(level='Ticker')['macd'].transform(lambda x: x.ewm(span=9, adjust=False).mean())
    
    features = features.drop('SPY', level='Ticker', errors='ignore')
    features = features.dropna(how='all')
    print("Feature engineering finished.")
    
    return features.loc[start_date:]

technical_features = create_technical_features()
print("\nGenerated features dataframe:")
display(technical_features.tail())
print("\nExample dataframe of one ticker:")
display(technical_features.loc[(slice(None), 'NVDA'), :].head())

Longest window is 50 days. Fetching data from 2015-02-11

/tmp/ipykernel_958/1823435030.py:38: FutureWarning: YF.download() has changed argument auto_adjust default to True
  stock_data = yf.download(tickers['Symbol'].tolist(), start=fetch_start_date, end=end_date, progress=False)

2 Failed downloads:
['BF.B']: YFPricesMissingError('possibly delisted; no price data found  (1d 2015-02-11 -> 2025-06-01)')
['BRK.B']: YFTzMissingError('possibly delisted; no timezone found')
/tmp/ipykernel_958/1823435030.py:40: FutureWarning: The previous implementation of stack is deprecated and will be removed in a future version of pandas. See the What's New notes for pandas 2.1.0 for details. Specify future_stack=True to adopt the new implementation and silence this warning.
  data = stock_data.stack(level=1).rename_axis(['Date', 'Ticker']) # Rename axis for clarity

Creating price and volume features
Creating moving average features
Creating volatility and oscillator features
Calculating rolling beta against SPY
Feature engineering finished.

Generated features dataframe:

Example dataframe of one ticker:

from statsmodels.tsa.stattools import coint

def find_cointegrated_pairs(features_df, price_series='close_ma_20', p_value_threshold=0.001):
    """
    Finds cointegrated pairs of stocks from a large features DataFrame.

    Returns:
        A DataFrame with cointegrated pairs of stocks.
    """
    print("Preparing data for full cointegration analysis")
    log_prices = np.log(features_df[price_series])
    wide_log_prices = log_prices.unstack(level='Ticker').dropna(axis=1)
    
    tickers = wide_log_prices.columns
    n_tickers = len(tickers)
    print(f"Analyzing {n_tickers} tickers, which requires {n_tickers * (n_tickers - 1) // 2:,} pair tests.")

    results = []

    for i in range(n_tickers):
        for j in range(i + 1, n_tickers):
            series1 = wide_log_prices[tickers[i]]
            series2 = wide_log_prices[tickers[j]]
            
            p_value = coint(series1, series2)[1]
            
            if p_value < p_value_threshold:
                results.append({
                    'Ticker 1': tickers[i],
                    'Ticker 2': tickers[j],
                    'P-Value': p_value
                })

    print(f"\nAnalysis complete")
    
    if not results:
        print("No cointegrated pairs found")
        return pd.DataFrame()

    return pd.DataFrame(results).sort_values('P-Value')


#Run the cointegration analysis on the full S&P
all_significant_pairs = find_cointegrated_pairs(technical_features)

print(f"\nFound {len(all_significant_pairs)} cointegrated pairs")
print("Top 10 most significant pairs:")
display(all_significant_pairs.head(10))

Preparing data for full cointegration analysis
Analyzing 466 tickers, which requires 108,345 pair tests.

Analysis complete

Found 494 cointegrated pairs
Top 10 most significant pairs:

def plot_pair(features_df, ticker1, ticker2, price_series='close_ma_20'):
    """
    Plots the log-price series for a specific pair of tickers.
    """
    log_prices = np.log(features_df[price_series])
    pair_prices = log_prices.loc[(slice(None), [ticker1, ticker2])].unstack(level='Ticker')
    
    plt.style.use('seaborn-v0_8-pastel')
    fig, ax = plt.subplots(figsize=(12, 6))
    ax.plot(pair_prices.index, pair_prices[ticker1], label=ticker1)
    ax.plot(pair_prices.index, pair_prices[ticker2], label=ticker2)
    ax.set_title(f'Price Series: {ticker1} & {ticker2}')
    ax.set_xlabel('Date')
    ax.set_ylabel('Log(Price)')
    ax.legend()
    plt.show()
top_pair = all_significant_pairs.iloc[1]
plot_pair(technical_features, top_pair['Ticker 1'], top_pair['Ticker 2'])

import statsmodels.api as sm
from statsmodels.stats.diagnostic import het_breuschpagan
from statsmodels.tsa.stattools import adfuller


def calculate_spread_zscore(features_df, ticker1, ticker2, window=21):
    """
    Calculates the hedge ratio, spread, and z-score for a stock pair,
    and tests for heteroscedasticity and stationarity in the regression residuals.
    
    Args:
        features_df: The main features DataFrame.
        ticker1: The first ticker (independent variable, X).
        ticker2: The second ticker (dependent variable, Y).
        window: The rolling window for calculating the z-score.
        
    Returns:
        A DataFrame with log prices, spread, and z-score.
    """
    #find the log-prices for the pair
    df = np.log(features_df.loc[(slice(None), [ticker1, ticker2]), 'close_ma_20']).unstack()
    df = df.dropna()

    #Calculate hedge ratio using an linear regression
    X = sm.add_constant(df[ticker1]) 
    model = sm.OLS(df[ticker2], X).fit()
    hedge_ratio = model.params[1]
    
    print(f'Cointegrated Stock Pair: {ticker1} vs {ticker2}')
    print(f"Hedge Ratio: {hedge_ratio:.4f}")
    
    #Calculate the spread
    spread = df[ticker2] - hedge_ratio * df[ticker1]
    
    #Calculate the Z-score of the spread
    mean_spread = spread.rolling(window=window).mean()
    std_spread = spread.rolling(window=window).std()
    df['z_score'] = (spread - mean_spread) / std_spread
    
    #Add spread to the DataFrame for plotting
    df['spread'] = spread
    

    #Heteroscedasticity Test
    #The test examines if the variance of the regression errors is independent.
    bp_test = het_breuschpagan(model.resid, model.model.exog)

    print("\n> Breusch-Pagan Test for Heteroscedasticity <")
    print(f"Null Hypothesis: The variance of the regression errors is constant")
    print(f"P-Value: {bp_test[1]:.8f}")

    if bp_test[1] < 0.05:
        print("Result: Reject the null hypothesis. Pairs are Heteroscedastic. Do not use this cointegrated pair! \n")
    else:
        print("Result: Fail to reject the null hypothesis. Pairs are Homoscedastic. \n")

    #ADF Test
    adf_result = adfuller(df['spread'])
    adf_p_value = adf_result[1]
    print("> Augmented Dicky Fuller Test <")
    print(f"Null Hypothesis: The data is non-stationary and so it behaves like a random walk")
    print(f"P-Value: {adf_p_value:.8f}")

    if adf_p_value < 0.05:
        print(f"Result: P-Value is less than 0.05. We reject the null hypothesis. The spread is stationary. \n")
    else:
        print("Result: P-Value is greater than 0.05. We fail to reject the null hypothesis.  The spread may have a unit root and is non-stationary. Do not use this cointegrated pair! \n")

    return df[['spread', 'z_score']]

top_pair = all_significant_pairs.iloc[0]
spread_analysis_df = calculate_spread_zscore(technical_features, top_pair['Ticker 1'], top_pair['Ticker 2'])
display(spread_analysis_df.tail())

Cointegrated Stock Pair: GRMN vs ICE
Hedge Ratio: 0.6911

> Breusch-Pagan Test for Heteroscedasticity <
Null Hypothesis: The variance of the regression errors is constant
P-Value: 0.00000000
Result: Reject the null hypothesis. Pairs are Heteroscedastic. Do not use this cointegrated pair! 

> Augmented Dicky Fuller Test <
Null Hypothesis: The data is non-stationary and so it behaves like a random walk
P-Value: 0.00000003
Result: P-Value is less than 0.05. We reject the null hypothesis. The spread is stationary.

/tmp/ipykernel_958/60794808.py:27: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
  hedge_ratio = model.params[1]

top_pair = all_significant_pairs.iloc[6]
spread_analysis_df = calculate_spread_zscore(technical_features, top_pair['Ticker 1'], top_pair['Ticker 2'])
display(spread_analysis_df.tail())

Cointegrated Stock Pair: HSIC vs SJM
Hedge Ratio: 0.9302

> Breusch-Pagan Test for Heteroscedasticity <
Null Hypothesis: The variance of the regression errors is constant
P-Value: 0.41888239
Result: Fail to reject the null hypothesis. Pairs are Homoscedastic. 

> Augmented Dicky Fuller Test <
Null Hypothesis: The data is non-stationary and so it behaves like a random walk
P-Value: 0.00006377
Result: P-Value is less than 0.05. We reject the null hypothesis. The spread is stationary.

/tmp/ipykernel_958/60794808.py:27: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
  hedge_ratio = model.params[1]

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression

def decide_trade_with_logistic_regression(spread_df, features_df, ticker1, ticker2, trade_horizon=15):
    
    pair_atr = features_df.loc[(slice(None), [ticker1, ticker2]), 'atr'].unstack()
    avg_atr = pair_atr.mean(axis=1)
    model_data = spread_df.join(avg_atr.rename('atr'))
    model_data.dropna(inplace=True)


    z_scores = model_data['z_score']
    future_outcome = z_scores.rolling(window=trade_horizon).apply(lambda x: 1 if np.sign(x.iloc[0]) != np.sign(x.iloc[-1]) else 0, raw=False)
    
    #shift label back to align the future outcome with today's features.
    model_data['trade_outcome'] = future_outcome.shift(-trade_horizon)

    entry_condition = (model_data['z_score'] > 1.5) | (model_data['z_score'] < -1.5)
    final_df = model_data[entry_condition].copy()
    
    #Drop NaNs created by shifting the label. These are the most recent data points for which the future outcome is not yet known to prevent data leakage!
    final_df.dropna(subset=['trade_outcome'], inplace=True)
    final_df['trade_outcome'] = final_df['trade_outcome'].astype(int)

    if len(final_df) < 50:
        print("Not enough historical trading opportunities to build a reliable model.")
        return "Not enough data"

    #Model Training
    X = final_df[['z_score', 'atr']]
    Y = final_df['trade_outcome']

    #Now the split is properly aligned without creating lookahead bias, shuffle must be false to avoid another source of data leakage
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=10, shuffle=False) 

    model = LogisticRegression(class_weight='balanced')
    model.fit(X_train, y_train)

    print("Model Evaluation on Test Data")
    predictions = model.predict(X_test)
    print(classification_report(y_test, predictions, zero_division=0))
    print("\n")

    #Make a Prediction for the most recent point
    last_point = X.iloc[[-1]]
    prediction = model.predict(last_point)[0]
    proba = model.predict_proba(last_point)[0][1]

    all_predictions = model.predict(X)
    final_df['model_decision'] = np.where(all_predictions == 1, 'Trade', 'No Trade')
    print(f"Signal for {last_point.index[0].date()}:")
    print(f"Z-Score: {last_point['z_score'].values[0]:.2f}, ATR: {last_point['atr'].values[0]:.4f}")
    print(f"Model's predicted probability of success for the most recent data point: {proba:.2%}")

    threshold = 0.6
    if prediction == 1 and proba > threshold:
        print(f"Probability over threshold: {threshold}, Decision: Enter Position!")
        return final_df
    else:
        print(f"Probability under threshold: {threshold}, Decision: Don't enter Position.")
        return final_df

trade_signal = decide_trade_with_logistic_regression(spread_analysis_df, technical_features, top_pair['Ticker 1'], top_pair['Ticker 2'], trade_horizon=15)

#Plots the z-score and overlays the model's trade decisions.
trade_signals = trade_signal[trade_signal['model_decision'] == 'Trade']
no_trade_signals = trade_signal[trade_signal['model_decision'] == 'No Trade']

plt.style.use('seaborn-v0_8-pastel')
fig, ax = plt.subplots(figsize=(15, 7))

#Plot the z-score entry opportunities and the model's decision
ax.scatter(no_trade_signals.index, no_trade_signals['z_score'], 
            marker='v', label='Signal: No Trade', s=80, alpha=0.7)
ax.scatter(trade_signals.index, trade_signals['z_score'], 
            marker='^', label='Signal: Trade', s=100, alpha=0.9)


ax.axhline(1.5, color='gray', linestyle='--', linewidth=1.5, label='Entry Threshold (1.5)')
ax.axhline(-1.5, color='gray', linestyle='--', linewidth=1.5)
ax.axhline(0, color='black', linestyle='-', linewidth=1.5, alpha=0.7, label='Mean (0.0)')

ax.set_title(f'Model Trade Decisions for {top_pair['Ticker 1']} & {top_pair['Ticker 2']}', fontsize=16)
ax.set_xlabel('Date', fontsize=10)
ax.set_ylabel('Z-Score', fontsize=10)
ax.legend()
plt.show()

Model Evaluation on Test Data
              precision    recall  f1-score   support

           0       0.70      0.95      0.81       262
           1       0.45      0.09      0.14       116

    accuracy                           0.69       378
   macro avg       0.58      0.52      0.48       378
weighted avg       0.63      0.69      0.61       378


Signal for 2025-04-25:
Z-Score: 1.53, ATR: 2.6802
Model's predicted probability of success for the most recent data point: 35.08%
Probability under threshold: 0.6, Decision: Don't enter Position.

from sklearn.ensemble import RandomForestClassifier

def decide_trade_with_random_forest(spread_df, features_df, ticker1, ticker2, trade_horizon=15):
    """
    Uses a Random Forest model with Z-Score, ATR, and Beta to classify a trading signal.
    """
    #find the features for the specific pair
    pair_atr = features_df.loc[(slice(None), [ticker1, ticker2]), 'atr'].unstack()
    pair_beta = features_df.loc[(slice(None), [ticker1, ticker2]), 'beta'].unstack()

    avg_atr = pair_atr.mean(axis=1)
    avg_beta = pair_beta.mean(axis=1)
    model_data = spread_df.join(avg_atr.rename('atr')).join(avg_beta.rename('beta'))
    model_data.dropna(inplace=True)


    z_scores = model_data['z_score']
    future_outcome = z_scores.rolling(window=trade_horizon).apply(lambda x: 1 if np.sign(x.iloc[0]) != np.sign(x.iloc[-1]) else 0, raw=False)
    
    #align the future outcome with today's features.
    model_data['trade_outcome'] = future_outcome.shift(-trade_horizon)

    entry_condition = (model_data['z_score'] > 1.5) | (model_data['z_score'] < -1.5)
    final_df = model_data[entry_condition].copy()
    
    #make sure to drop rows where the future outcome is not yet known.
    final_df.dropna(subset=['trade_outcome'], inplace=True)
    final_df['trade_outcome'] = final_df['trade_outcome'].astype(int)

    if len(final_df) < 50:
        print("Not enough historical trading opportunities to build a reliable model.")
        return "Not enough data"

    X = final_df[['z_score', 'atr', 'beta']]
    Y = final_df['trade_outcome']

    # Train and evaluate the model
    X_train, X_test, y_train, y_test = train_test_split(X.iloc[:-1], Y.iloc[:-1], test_size=0.3, random_state=20, shuffle=False)

    model = RandomForestClassifier(
        n_estimators=100,       
        max_depth=10,           #Max depth of each tree to reduce overfitting
        random_state=20,        
        class_weight='balanced'
    )
    model.fit(X_train, y_train)

    print("> Random Forest Model Evaluation on Test Data <")
    print(classification_report(y_test, model.predict(X_test), zero_division=0))
    print("\n")
    

    feature_importances = pd.DataFrame(model.feature_importances_, index=X.columns, columns=['importance']).sort_values('importance', ascending=False)
    print("> Feature Importances <")
    print(feature_importances)
    print("\n")

    #make a prediction for the most recent point
    last_point = X.iloc[[-1]]
    prediction = model.predict(last_point)[0]
    proba = model.predict_proba(last_point)[0][1]
    #return all predictions for model review
    all_predictions = model.predict(X)
    final_df['model_decision'] = np.where(all_predictions == 1, 'Trade', 'No Trade')

    print(f"Signal for {last_point.index[0].date()}:")
    print(f"Z-Score: {last_point['z_score'].values[0]:.2f}, ATR: {last_point['atr'].values[0]:.4f}, Beta: {last_point['beta'].values[0]:.2f}")
    print(f"Model's predicted probability of success for the final data point: {proba:.2%}")

    threshold = 0.6
    if prediction == 1 and proba > threshold:
        print(f"Probability over threshold: {threshold}, Enter Position!")
        return final_df
    else:
        print(f"Probability under threshold: {threshold}, Don't enter Position.")
        return final_df

trade_signal = decide_trade_with_random_forest(spread_analysis_df, technical_features, top_pair['Ticker 1'], top_pair['Ticker 2'])

#Plots the z-score and overlays the model's trade decisions.
trade_signals = trade_signal[trade_signal['model_decision'] == 'Trade']
no_trade_signals = trade_signal[trade_signal['model_decision'] == 'No Trade']

plt.style.use('seaborn-v0_8-pastel')
fig, ax = plt.subplots(figsize=(15, 7))

#Plot the z-score entry opportunities and the model's decision
ax.scatter(no_trade_signals.index, no_trade_signals['z_score'], 
            marker='v', label='Signal: No Trade', s=80, alpha=0.7)
ax.scatter(trade_signals.index, trade_signals['z_score'], 
            marker='^', label='Signal: Trade', s=100, alpha=0.9)

ax.axhline(1.5, color='gray', linestyle='--', linewidth=1.5, label='Entry Threshold (1.5)')
ax.axhline(-1.5, color='gray', linestyle='--', linewidth=1.5)
ax.axhline(0, color='black', linestyle='-', linewidth=1.5, alpha=0.7, label='Mean (0.0)')

ax.set_title(f'Model Trade Decisions for {top_pair['Ticker 1']} & {top_pair['Ticker 2']}', fontsize=16)
ax.set_xlabel('Date', fontsize=10)
ax.set_ylabel('Z-Score', fontsize=10)
ax.legend()
plt.show()

> Random Forest Model Evaluation on Test Data <
              precision    recall  f1-score   support

           0       0.73      0.86      0.79       263
           1       0.46      0.27      0.34       115

    accuracy                           0.68       378
   macro avg       0.60      0.57      0.57       378
weighted avg       0.65      0.68      0.65       378


> Feature Importances <
         importance
atr        0.377398
z_score    0.320294
beta       0.302308


Signal for 2025-04-25:
Z-Score: 1.53, ATR: 2.6802, Beta: 0.42
Model's predicted probability of success for the final data point: 18.02%
Probability under threshold: 0.6, Don't enter Position.

		intraday_return	overnight_gap	log_volume	vol_ma_5	close_ma_5	price_vs_ma_5	vol_ma_20	close_ma_20	price_vs_ma_20	vol_ma_50	...	price_vs_ma_50	close_ema_20	price_vs_ema_20	close_ema_50	price_vs_ema_50	atr	beta	rsi	macd	macd_signal
Date	Ticker
2025-05-30	XYZ	0.000486	-0.007078	16.256709	9460220.0	61.322001	0.006980	13996565.0	55.565500	0.111301	10588088.0	...	0.105399	57.871444	0.067020	58.543680	0.054768	2.891990	1.300048	61.045201	1.696999	0.857835
	YUM	-0.000694	0.000000	14.973775	2004600.0	143.991998	-0.000361	1838925.0	146.058117	-0.014502	2302796.0	...	-0.027595	145.654768	-0.011773	146.527114	-0.017656	2.879334	0.379498	43.000200	-0.935830	-0.703460
	ZBH	-0.008285	-0.004285	15.069758	2731520.0	92.689636	-0.008178	3082335.0	94.478996	-0.026963	2449406.0	...	-0.086578	94.945031	-0.031739	98.713336	-0.068701	2.733337	0.349883	39.438192	-2.164977	-2.181889
	ZBRA	-0.000552	-0.008786	13.394070	444180.0	290.457996	-0.002369	581725.0	283.547498	0.021945	684846.0	...	0.096115	282.861863	0.024422	279.868064	0.035381	10.182748	1.672413	57.538800	8.824800	9.284781
	ZTS	0.010244	-0.001316	15.514387	2939440.0	165.448767	0.015831	3276110.0	160.661278	0.046102	2950692.0	...	0.071416	161.316085	0.041855	159.492603	0.053767	4.290751	0.696194	63.200201	2.886123	2.149763

		intraday_return	overnight_gap	log_volume	vol_ma_5	close_ma_5	price_vs_ma_5	vol_ma_20	close_ma_20	price_vs_ma_20	vol_ma_50	...	price_vs_ma_50	close_ema_20	price_vs_ema_20	close_ema_50	price_vs_ema_50	atr	beta	rsi	macd	macd_signal
Date	Ticker
2015-06-01	NVDA	0.004939	0.006326	19.736177	345509600.0	0.528651	0.024538	359851400.0	0.518258	0.045084	322590240.0	...	0.029381	0.522126	0.037342	0.524375	0.032893	0.011786	1.183094	61.436413	0.000310	-0.003761
2015-06-02	NVDA	-0.013489	-0.006256	19.228513	338424000.0	0.534508	-0.006611	361030600.0	0.517573	0.025892	317834640.0	...	0.010464	0.522968	0.015309	0.524634	0.012086	0.011761	1.188037	54.395254	0.000923	-0.002824
2015-06-03	NVDA	-0.013636	0.002735	19.340119	302115200.0	0.533830	-0.016230	358923600.0	0.517297	0.015212	314330160.0	...	0.000245	0.523178	0.003801	0.524655	0.000975	0.011783	1.139949	50.964346	0.000929	-0.002074
2015-06-04	NVDA	0.024108	-0.005991	19.595365	301326400.0	0.533588	0.001905	359285000.0	0.517408	0.033236	314194800.0	...	0.018416	0.524266	0.019720	0.525045	0.018208	0.011881	0.998495	55.838945	0.001677	-0.001323
2015-06-05	NVDA	0.010899	-0.003169	19.562736	296956800.0	0.534217	0.008426	348222800.0	0.517255	0.041496	309610160.0	...	0.025008	0.525642	0.024877	0.525581	0.024997	0.011956	0.596188	57.807817	0.002571	-0.000545

	Ticker 1	Ticker 2	P-Value
302	GRMN	ICE	3.237443e-07
150	CDNS	LIN	3.904729e-07
407	LOW	MPWR	2.483632e-06
200	DOV	HD	3.126613e-06
76	AMGN	DECK	3.210139e-06
176	CMI	TMUS	4.510968e-06
344	HSIC	SJM	4.926934e-06
85	AMGN	PGR	6.002242e-06
396	KO	WRB	6.307976e-06
387	KMX	MDT	7.438441e-06

Ticker	spread	z_score
Date
2025-05-23	1.506151	1.472647
2025-05-27	1.509625	1.475048
2025-05-28	1.513292	1.497463
2025-05-29	1.513511	1.397256
2025-05-30	1.513239	1.288494

Ticker	spread	z_score
Date
2025-05-23	0.785017	-1.909664
2025-05-27	0.780018	-1.817814
2025-05-28	0.773734	-1.776940
2025-05-29	0.767870	-1.727366
2025-05-30	0.764173	-1.633421